// ===================================================================
//
// Data to create technology gap distribution
//
// ===================================================================

* pat63_02f.dta is the updated data from NBER Patent Data Project. 
* It can be downloaded from http://elsa.berkeley.edu/~bhhall/patents.html

use "pat63_02f.dta", clear
keep if country=="US"
keep if(regexm(ipc4,"^[A-Z]"))
drop if appyear==.
drop if ipc4==""
bys appyear ipc4: gen kus=_N
bys appyear ipc4 assignee: keep if _n==1
bys appyear ipc4: egen kusf0=total(assignee==0)
bys appyear ipc4: gen kusf=_N
bys appyear ipc4: keep if _n==1
keep appyear ipc4 kus kusf kusf0
sort appyear ipc4
save patUS.dta, replace

use "pat63_02f.dta", clear

drop if appyear==.
drop if country==""
drop if ipc4==""
keep if country=="JP" | country=="GB" | country=="DE" | country=="FR" | country=="CA" | country=="IT" 
keep if(regexm(ipc4,"^[A-Z]")) 
keep  appyear ipc4 assignee
bys appyear ipc4: gen kfrgn=_N
bys appyear ipc4: keep if _n==1
sort appyear ipc4
gen vecUS = 0

merge appyear ipc4 using patUS.dta
erase patUS.dta

keep if appyear == 1975 // 1981, 1995 -- change here for relevant years
replace kfrgn=0 if kfrgn==.
replace kus=0   if kus==.
drop _m

keep appyear ipc4 kfrgn kus kusf kusf0
collapse (sum) kus kfrgn kusf kusf0, by(ipc4)
gen ktot = kus+kfrgn
drop if ktot<10


gen us_rat = 100*kus/ktot
sort us_rat
gen bin = ceil(us_rat/(100/33))
bys bin: gen N_bin = _N
gen freq = N_bin/_N
replace freq = round(10^5*freq)/10^3

bys bin: egen kus_tot = total(kus)
bys bin: egen kusf_tot = total(kusf)
bys bin: egen kusf0_tot = total(kusf0)
gen kus_mean = kus_tot/kusf_tot

gen bin2 = bin-17
histogram us_rat, bin(33) start(-16) kdensity kdenopts(bwidth(1.8)) percent name(gr1,replace)

gen varx = _n in 1/33
cap drop kdx kdy
kdensity bin, kernel(gaussian) at(varx) bwidth(1.8) generate(kdx kdy) name(gr2,replace) // **
s
bys bin: keep if _n==1
egen tot = total(freq)

** kdy gives the distribution (to be scaled to ensure the sum equals 1)
** kdy obtains the columns in us_rat_smooth.mat


